---
title: "text analysis on open question"
author: "Shoko Kohama, Kai Quek, Atsushi Tago"
date: "2022/11"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

```{r setting, include=FALSE}
rm(list = ls())

# Load library packages
library(quanteda)
library(tidyverse)
library(readtext)
library(stringr)
library(dplyr)
library(lubridate)
library(topicmodels)
library("quanteda.textplots")
```

This is a supplementary analysis on open question right after the DV question. We asked the respondents how they think when they answered the DV question.

As Levy et al. (2015) offered an analysis on the open-ended question to uncover what drives the attitudes of the general public in considering if 1) reputation harm, 2) credibility harm or 3) competence matters the most.  In Japanese language, reputation concern would be caught by the words like "名誉", "評判" or "評価". The credibility concern can be captured by the words like "信用", "信頼" or "信憑". Finally, competence concern would be caught by the words like "弱腰", "負け" or "譲歩".

The following analyses on open-ended question's texts by sub-groups in Experiments 1 (Japan threatens and later backs down) and 2 (China threatens and later backs down) provide some crucial differences among the people in each group. First, the those who supported the Prime Minister in Experiment 1 (Japan threatens and later backs down) care a lot about the competence and thus the words like "弱腰", "負け" or "譲歩" often appear. Second, this does not happen for the group of those opposed the Prime Minister in Experiment 1. Furthermore, this does not also happen among the people who supported the Prime Minister in Experiment 2 (China threatens and later backs down). Interestingly, for those who opposed the Prime Minister in Experiment 1 do not really use any of key words for reputation, credibility nor competition. By seeing the topic model outcome, it may be possible that the people in this group talks about "provocation (挑発)" and "spur (刺激)", "danger (危険)" and "risks (リスク)", and "talk (話し合い)". For the Experiment 2 (China threatens and later backs down), those who supported the Prime Minister may be relatively caring about the reputation; those who opposed the Prime Minister seem to care about the reputation and competence. The credibility may be somehow cared by those who supported the Prime Minister in Experiment 2 but the relative effect is small. 

Experiment 1  (Japan threatens and later backs down): Only sub-group supported the Prime Minister
```{r code1, echo=F}
data1 <- read.csv("C:/Users/81904/Desktop/replication/exp1_JPNthreats_backdown.csv", fileEncoding="UTF-8-BOM")
data1 <- filter(data1, data1$q01 == "1" | data1$q02c == "2")

full_corp1 <- corpus(data1, text_field = "q03x")
ndoc(full_corp1)

toks1 <- tokens(full_corp1, remove_punct = FALSE)  

toks1 <- tokens_select(toks1, '^[０-９ぁ-んァ-ヶー一-龠]+$', valuetype = 'regex', padding = TRUE)
toks1 <- tokens_remove(toks1, '^[ぁ-ん]+$', valuetype = 'regex', padding = TRUE)

mx <- dfm(toks1)
mx <- dfm_remove(mx, '特に')
mx <- dfm_remove(mx, '思う')
mx <- dfm_remove(mx, '的')
mx <- dfm_remove(mx, '点')
topfeatures(mx, 20)

open_dfm1 <- dfm(toks1, remove = "") %>% 
    dfm_remove("^[ぁ-ん]+$", valuetype = "regex", min_nchar = 2) %>% 
    dfm_trim(min_termfreq = 0.50, termfreq_type = "quantile", max_termfreq = 0.99)

head(kwic(toks1, "名誉"), 20)
head(kwic(toks1, "評判"), 20)
head(kwic(toks1, "評価"), 20)
head(kwic(toks1, "信用"), 20)
head(kwic(toks1, "信頼"), 20)
head(kwic(toks1, "信憑"), 20)
head(kwic(toks1, "弱腰"), 20)
head(kwic(toks1, "負け"), 20)
head(kwic(toks1, "譲歩"), 20)

set.seed(100)
lda1 <- LDA(convert(open_dfm1, to = "topicmodels"), k = 10)
get_terms(lda1, 10) %>% knitr::kable()

```

Experiment 1 (Japan threatens and later backs down): Only sub-group opposed the Prime Minister
```{r code2, echo=F}
data2 <- read.csv("C:/Users/81904/Desktop/replication/exp1_JPNthreats_backdown.csv", fileEncoding="UTF-8-BOM")
data2 <- filter(data2, data2$q01 == "0" | data2$q02c == "0")

full_corp2 <- corpus(data2, text_field = "q03x")
ndoc(full_corp2)

toks2 <- tokens(full_corp2, remove_punct = FALSE)  

toks2 <- tokens_select(toks2, '^[０-９ぁ-んァ-ヶー一-龠]+$', valuetype = 'regex', padding = TRUE)
toks2 <- tokens_remove(toks2, '^[ぁ-ん]+$', valuetype = 'regex', padding = TRUE)

mx <- dfm(toks2)
mx <- dfm_remove(mx, '特に')
mx <- dfm_remove(mx, '思う')
mx <- dfm_remove(mx, '的')
mx <- dfm_remove(mx, '点')
topfeatures(mx, 20)

head(kwic(toks2, "名誉"), 20)
head(kwic(toks2, "評判"), 20)
head(kwic(toks2, "評価"), 20)
head(kwic(toks2, "信用"), 20)
head(kwic(toks2, "信頼"), 20)
head(kwic(toks2, "信憑"), 20)
head(kwic(toks2, "弱腰"), 20)
head(kwic(toks2, "負け"), 20)
head(kwic(toks2, "譲歩"), 20)

open_dfm2 <- dfm(toks2, remove = "") %>% 
    dfm_remove("^[ぁ-ん]+$", valuetype = "regex", min_nchar = 2) %>% 
    dfm_trim(min_termfreq = 0.50, termfreq_type = "quantile", max_termfreq = 0.99)

set.seed(100)
lda2 <- LDA(convert(open_dfm2, to = "topicmodels"), k = 10)
get_terms(lda2, 10) %>% knitr::kable()

```

Experiment 2 (China threatens and later backs down): Only sub-group supported the Prime Minister
```{r code3, echo=F}
data1 <- read.csv("C:/Users/81904/Desktop/replication/exp2_CHNthreats_backdown.csv", fileEncoding="UTF-8-BOM")
data1 <- filter(data1, data1$q01 == "1" | data1$q02c == "2")

full_corp1 <- corpus(data1, text_field = "q03x")
ndoc(full_corp1)

toks1 <- tokens(full_corp1, remove_punct = FALSE)  

toks1 <- tokens_select(toks1, '^[０-９ぁ-んァ-ヶー一-龠]+$', valuetype = 'regex', padding = TRUE)
toks1 <- tokens_remove(toks1, '^[ぁ-ん]+$', valuetype = 'regex', padding = TRUE)

mx <- dfm(toks1)
mx <- dfm_remove(mx, '特に')
mx <- dfm_remove(mx, '思う')
mx <- dfm_remove(mx, '的')
mx <- dfm_remove(mx, '点')
topfeatures(mx, 20)

head(kwic(toks1, "名誉"), 20)
head(kwic(toks1, "評判"), 20)
head(kwic(toks1, "評価"), 20)
head(kwic(toks1, "信用"), 20)
head(kwic(toks1, "信頼"), 20)
head(kwic(toks1, "信憑"), 20)
head(kwic(toks1, "弱腰"), 20)
head(kwic(toks1, "負け"), 20)
head(kwic(toks1, "譲歩"), 20)

open_dfm1 <- dfm(toks1, remove = "") %>% 
    dfm_remove("^[ぁ-ん]+$", valuetype = "regex", min_nchar = 2) %>% 
    dfm_trim(min_termfreq = 0.50, termfreq_type = "quantile", max_termfreq = 0.99)

set.seed(100)
lda1 <- LDA(convert(open_dfm1, to = "topicmodels"), k = 10)
get_terms(lda1, 10) %>% knitr::kable()

```

Experiment 2 (China threatens and later backs down): Only sub-group opposed the Prime Minister
```{r code4, echo=F}
data2 <- read.csv("C:/Users/81904/Desktop/replication/exp2_CHNthreats_backdown.csv", fileEncoding="UTF-8-BOM")
data2 <- filter(data2, data2$q01 == "0" | data2$q02c == "0")

full_corp2 <- corpus(data2, text_field = "q03x")
ndoc(full_corp2)


toks2 <- tokens(full_corp2, remove_punct = FALSE)  

toks2 <- tokens_select(toks2, '^[０-９ぁ-んァ-ヶー一-龠]+$', valuetype = 'regex', padding = TRUE)
toks2 <- tokens_remove(toks2, '^[ぁ-ん]+$', valuetype = 'regex', padding = TRUE)

mx <- dfm(toks2)
mx <- dfm_remove(mx, '特に')
mx <- dfm_remove(mx, '思う')
mx <- dfm_remove(mx, '的')
mx <- dfm_remove(mx, '点')
topfeatures(mx, 20)

head(kwic(toks2, "名誉"), 20)
head(kwic(toks2, "評判"), 20)
head(kwic(toks2, "評価"), 20)
head(kwic(toks2, "信用"), 20)
head(kwic(toks2, "信頼"), 20)
head(kwic(toks2, "信憑"), 20)
head(kwic(toks2, "弱腰"), 20)
head(kwic(toks2, "負け"), 20)
head(kwic(toks2, "譲歩"), 20)

open_dfm2 <- dfm(toks2, remove = "") %>% 
    dfm_remove("^[ぁ-ん]+$", valuetype = "regex", min_nchar = 2) %>% 
    dfm_trim(min_termfreq = 0.50, termfreq_type = "quantile", max_termfreq = 0.99)

set.seed(100)
lda2 <- LDA(convert(open_dfm2, to = "topicmodels"), k = 10)
get_terms(lda2, 10) %>% knitr::kable()

```
